1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.solr.search.join;
19  
20  import java.io.IOException;
21  import java.lang.invoke.MethodHandles;
22  import java.util.ArrayList;
23  import java.util.Collection;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.LinkedHashMap;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.Set;
31  
32  import org.apache.lucene.search.Query;
33  import org.apache.lucene.search.join.ScoreMode;
34  import org.apache.solr.JSONTestUtil;
35  import org.apache.solr.SolrTestCaseJ4;
36  import org.apache.solr.common.params.MapSolrParams;
37  import org.apache.solr.request.SolrQueryRequest;
38  import org.apache.solr.request.SolrRequestInfo;
39  import org.apache.solr.response.SolrQueryResponse;
40  import org.apache.solr.search.JoinQParserPlugin;
41  import org.apache.solr.search.QParser;
42  import org.apache.solr.search.SyntaxError;
43  import org.junit.BeforeClass;
44  import org.junit.Test;
45  import org.noggit.JSONUtil;
46  import org.noggit.ObjectBuilder;
47  import org.slf4j.Logger;
48  import org.slf4j.LoggerFactory;
49  
50  public class TestScoreJoinQPNoScore extends SolrTestCaseJ4 {
51  
52    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
53  
54    @BeforeClass
55    public static void beforeTests() throws Exception {
56      System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
57      initCore("solrconfig-basic.xml","schema-docValuesJoin.xml");
58    }
59  
60    @Test
61    public void testJoin() throws Exception {
62      assertU(add(doc("id", "1","name_s", "john", "title_s", "Director", "dept_ss","Engineering")));
63      assertU(add(doc("id", "2","name_s", "mark", "title_s", "VP", "dept_ss","Marketing")));
64      assertU(add(doc("id", "3","name_s", "nancy", "title_s", "MTS", "dept_ss","Sales")));
65      assertU(add(doc("id", "4","name_s", "dave", "title_s", "MTS", "dept_ss","Support", "dept_ss","Engineering")));
66      assertU(add(doc("id", "5","name_s", "tina", "title_s", "VP", "dept_ss","Engineering")));
67  
68      assertU(add(doc("id","10", "dept_id_s", "Engineering", "text_t","These guys develop stuff")));
69      assertU(add(doc("id","11", "dept_id_s", "Marketing", "text_t","These guys make you look good")));
70      assertU(add(doc("id","12", "dept_id_s", "Sales", "text_t","These guys sell stuff")));
71      assertU(add(doc("id","13", "dept_id_s", "Support", "text_t","These guys help customers")));
72  
73      assertU(commit());
74  
75      // test debugging TODO no debug in JoinUtil
76    //  assertJQ(req("q","{!join from=dept_ss to=dept_id_s"+whateverScore()+"}title_s:MTS", "fl","id", "debugQuery","true")
77    //      ,"/debug/join/{!join from=dept_ss to=dept_id_s"+whateverScore()+"}title_s:MTS=={'_MATCH_':'fromSetSize,toSetSize', 'fromSetSize':2, 'toSetSize':3}"
78    //  );
79  
80      assertJQ(req("q","{!join from=dept_ss to=dept_id_s"+whateverScore()+"}title_s:MTS", "fl","id")
81          ,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}"
82      );
83  
84      // empty from
85      assertJQ(req("q","{!join from=noexist_s to=dept_id_s"+whateverScore()+"}*:*", "fl","id")
86          ,"/response=={'numFound':0,'start':0,'docs':[]}"
87      );
88  
89      // empty to
90      assertJQ(req("q","{!join from=dept_ss to=noexist_s"+whateverScore()+"}*:*", "fl","id")
91          ,"/response=={'numFound':0,'start':0,'docs':[]}"
92      );
93  
94      // self join... return everyone with she same title as Dave
95      assertJQ(req("q","{!join from=title_s to=title_s"+whateverScore()+"}name_s:dave", "fl","id")
96          ,"/response=={'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'4'}]}"
97      );
98  
99      // find people that develop stuff
100     assertJQ(req("q","{!join from=dept_id_s to=dept_ss"+whateverScore()+"}text_t:develop", "fl","id")
101         ,"/response=={'numFound':3,'start':0,'docs':[{'id':'1'},{'id':'4'},{'id':'5'}]}"
102     );
103 
104     // self join on multivalued text_t field
105     assertJQ(req("q","{!join from=title_s to=title_s"+whateverScore()+"}name_s:dave", "fl","id")
106         ,"/response=={'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'4'}]}"
107     );
108 
109     assertJQ(req("q","{!join from=dept_ss to=dept_id_s"+whateverScore()+"}title_s:MTS", "fl","id", "debugQuery","true")
110         ,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}"
111     );
112     
113     // expected outcome for a sub query matching dave joined against departments
114     final String davesDepartments = 
115       "/response=={'numFound':2,'start':0,'docs':[{'id':'10'},{'id':'13'}]}";
116 
117     // straight forward query
118     assertJQ(req("q","{!join from=dept_ss to=dept_id_s"+whateverScore()+"}name_s:dave", 
119                  "fl","id"),
120              davesDepartments);
121 
122     // variable deref for sub-query parsing
123     assertJQ(req("q","{!join from=dept_ss to=dept_id_s v=$qq"+whateverScore()+"}", 
124                  "qq","{!dismax}dave",
125                  "qf","name_s",
126                  "fl","id", 
127                  "debugQuery","true"),
128              davesDepartments);
129 
130     // variable deref for sub-query parsing w/localparams
131     assertJQ(req("q","{!join from=dept_ss to=dept_id_s v=$qq"+whateverScore()+"}", 
132                  "qq","{!dismax qf=name_s}dave",
133                  "fl","id", 
134                  "debugQuery","true"),
135              davesDepartments);
136 
137     // defType local param to control sub-query parsing
138     assertJQ(req("q","{!join from=dept_ss to=dept_id_s defType=dismax"+whateverScore()+"}dave", 
139                  "qf","name_s",
140                  "fl","id", 
141                  "debugQuery","true"),
142              davesDepartments);
143 
144     // find people that develop stuff - but limit via filter query to a name of "john"
145     // this tests filters being pushed down to queries (SOLR-3062)
146     assertJQ(req("q","{!join from=dept_id_s to=dept_ss"+whateverScore()+"}text_t:develop", "fl","id", "fq", "name_s:john")
147              ,"/response=={'numFound':1,'start':0,'docs':[{'id':'1'}]}"
148             );
149     
150 
151    assertJQ(req("q","{!join from=dept_ss to=dept_id_s"+whateverScore()+"}title_s:MTS", "fl","id"
152           )
153           ,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}");
154 
155       // find people that develop stuff, even if it's requested as single value
156     assertJQ(req("q","{!join from=dept_id_s to=dept_ss"+whateverScore()+"}text_t:develop", "fl","id")
157         ,"/response=={'numFound':3,'start':0,'docs':[{'id':'1'},{'id':'4'},{'id':'5'}]}"
158     );
159 
160   }
161 
162   public void testJoinQueryType() throws SyntaxError, IOException{
163     SolrQueryRequest req = null;
164     try{
165       final String score = whateverScore();
166       
167       req = req("{!join from=dept_id_s to=dept_ss"+score+"}text_t:develop");
168       SolrQueryResponse rsp = new SolrQueryResponse();
169       SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
170       
171       {
172         final Query query = QParser.getParser(req.getParams().get("q"), null, req).getQuery();
173         final Query rewrittenQuery = query.rewrite(req.getSearcher().getIndexReader());
174         assertTrue(
175             rewrittenQuery+" should be Lucene's",
176             rewrittenQuery.getClass().getPackage().getName()
177             .startsWith("org.apache.lucene"));
178       }
179       {
180         final Query query = QParser.getParser(
181             "{!join from=dept_id_s to=dept_ss}text_t:develop"
182             , null, req).getQuery();
183         final Query rewrittenQuery = query.rewrite(req.getSearcher().getIndexReader());
184         assertEquals(rewrittenQuery+" is expected to be from Solr",
185               JoinQParserPlugin.class.getPackage().getName(), 
186               rewrittenQuery.getClass().getPackage().getName());
187       }
188     }finally{
189       if(req!=null){
190         req.close();
191       }
192       SolrRequestInfo.clearRequestInfo();
193     }
194   }
195 
196   public static String whateverScore() {
197       final ScoreMode[] vals = ScoreMode.values();
198       return " score="+vals[random().nextInt(vals.length)]+" ";
199   }
200 
201   @Test
202   public void testRandomJoin() throws Exception {
203     int indexIter=50 * RANDOM_MULTIPLIER;
204     int queryIter=50 * RANDOM_MULTIPLIER;
205 
206     // groups of fields that have any chance of matching... used to
207     // increase test effectiveness by avoiding 0 resultsets much of the time.
208     String[][] compat = new String[][] {
209         {"small_s_dv","small2_s_dv","small2_ss_dv","small3_ss_dv"},
210         {"small_i_dv","small2_i_dv","small2_is_dv","small3_is_dv"}
211     };
212 
213 
214     while (--indexIter >= 0) {
215       int indexSize = random().nextInt(20 * RANDOM_MULTIPLIER);
216 
217       List<FldType> types = new ArrayList<FldType>();
218       types.add(new FldType("id",ONE_ONE, new SVal('A','Z',4,4)));
219       /** no numeric fields so far LUCENE-5868
220       types.add(new FldType("score_f_dv",ONE_ONE, new FVal(1,100)));  // field used to score
221       **/
222       types.add(new FldType("small_s_dv",ZERO_ONE, new SVal('a',(char)('c'+indexSize/3),1,1)));
223       types.add(new FldType("small2_s_dv",ZERO_ONE, new SVal('a',(char)('c'+indexSize/3),1,1)));
224       types.add(new FldType("small2_ss_dv",ZERO_TWO, new SVal('a',(char)('c'+indexSize/3),1,1)));
225       types.add(new FldType("small3_ss_dv",new IRange(0,25), new SVal('A','z',1,1)));
226       /** no numeric fields so far LUCENE-5868
227       types.add(new FldType("small_i_dv",ZERO_ONE, new IRange(0,5+indexSize/3)));
228       types.add(new FldType("small2_i_dv",ZERO_ONE, new IRange(0,5+indexSize/3)));
229       types.add(new FldType("small2_is_dv",ZERO_TWO, new IRange(0,5+indexSize/3)));
230       types.add(new FldType("small3_is_dv",new IRange(0,25), new IRange(0,100)));
231       **/
232 
233       clearIndex();
234       Map<Comparable, Doc> model = indexDocs(types, null, indexSize);
235       Map<String, Map<Comparable, Set<Comparable>>> pivots = new HashMap<String, Map<Comparable, Set<Comparable>>>();
236 
237       for (int qiter=0; qiter<queryIter; qiter++) {
238         String fromField;
239         String toField;
240         if (random().nextInt(100) < 5) {
241           // pick random fields 5% of the time
242           fromField = types.get(random().nextInt(types.size())).fname;
243           // pick the same field 50% of the time we pick a random field (since other fields won't match anything)
244           toField = (random().nextInt(100) < 50) ? fromField : types.get(random().nextInt(types.size())).fname;
245         } else {
246           // otherwise, pick compatible fields that have a chance of matching indexed tokens
247           String[] group = compat[random().nextInt(compat.length)];
248           fromField = group[random().nextInt(group.length)];
249           toField = group[random().nextInt(group.length)];
250         }
251 
252         Map<Comparable, Set<Comparable>> pivot = pivots.get(fromField+"/"+toField);
253         if (pivot == null) {
254           pivot = createJoinMap(model, fromField, toField);
255           pivots.put(fromField+"/"+toField, pivot);
256         }
257 
258         Collection<Doc> fromDocs = model.values();
259         Set<Comparable> docs = join(fromDocs, pivot);
260         List<Doc> docList = new ArrayList<Doc>(docs.size());
261         for (Comparable id : docs) docList.add(model.get(id));
262         Collections.sort(docList, createComparator("_docid_",true,false,false,false));
263         List sortedDocs = new ArrayList();
264         for (Doc doc : docList) {
265           if (sortedDocs.size() >= 10) break;
266           sortedDocs.add(doc.toObject(h.getCore().getLatestSchema()));
267         }
268 
269         Map<String,Object> resultSet = new LinkedHashMap<String,Object>();
270         resultSet.put("numFound", docList.size());
271         resultSet.put("start", 0);
272         resultSet.put("docs", sortedDocs);
273 
274         // todo: use different join queries for better coverage
275 
276         SolrQueryRequest req = req("wt","json","indent","true", "echoParams","all",
277             "q","{!join from="+fromField+" to="+toField
278                 +" "+ (random().nextBoolean() ? "fromIndex=collection1" : "")
279                 +" "+ (random().nextBoolean() ? "TESTenforceSameCoreAsAnotherOne=true" : "")
280                 +" "+whateverScore()+"}*:*"
281                 , "sort", "_docid_ asc"
282         );
283 
284         String strResponse = h.query(req);
285 
286         Object realResponse = ObjectBuilder.fromJSON(strResponse);
287         String err = JSONTestUtil.matchObj("/response", realResponse, resultSet);
288         if (err != null) {
289           final String m = "JOIN MISMATCH: " + err
290            + "\n\trequest="+req
291            + "\n\tresult="+strResponse
292            + "\n\texpected="+ JSONUtil.toJSON(resultSet)
293           ;// + "\n\tmodel="+ JSONUtil.toJSON(model);
294           log.error(m);
295           {
296             SolrQueryRequest f = req("wt","json","indent","true", "echoParams","all",
297               "q","*:*", "facet","true",
298               "facet.field", fromField 
299                   , "sort", "_docid_ asc"
300                   ,"rows","0"
301                 );
302             log.error("faceting on from field: "+h.query(f));
303           }
304           {
305             final Map<String,String> ps = ((MapSolrParams)req.getParams()).getMap();
306             final String q = ps.get("q");
307             ps.put("q", q.replaceAll("join score=none", "join"));
308             log.error("plain join: "+h.query(req));
309             ps.put("q", q);
310             
311           }
312           {
313           // re-execute the request... good for putting a breakpoint here for debugging
314           final Map<String,String> ps = ((MapSolrParams)req.getParams()).getMap();
315           final String q = ps.get("q");
316           ps.put("q", q.replaceAll("\\}", " cache=false\\}"));
317           String rsp = h.query(req);
318           }
319           fail(err);
320         }
321 
322       }
323     }
324   }
325 
326   Map<Comparable, Set<Comparable>> createJoinMap(Map<Comparable, Doc> model, String fromField, String toField) {
327     Map<Comparable, Set<Comparable>> id_to_id = new HashMap<Comparable, Set<Comparable>>();
328 
329     Map<Comparable, List<Comparable>> value_to_id = invertField(model, toField);
330 
331     for (Comparable fromId : model.keySet()) {
332       Doc doc = model.get(fromId);
333       List<Comparable> vals = doc.getValues(fromField);
334       if (vals == null) continue;
335       for (Comparable val : vals) {
336         List<Comparable> toIds = value_to_id.get(val);
337         if (toIds == null) continue;
338         Set<Comparable> ids = id_to_id.get(fromId);
339         if (ids == null) {
340           ids = new HashSet<Comparable>();
341           id_to_id.put(fromId, ids);
342         }
343         for (Comparable toId : toIds)
344           ids.add(toId);
345       }
346     }
347 
348     return id_to_id;
349   }
350 
351 
352   Set<Comparable> join(Collection<Doc> input, Map<Comparable, Set<Comparable>> joinMap) {
353     Set<Comparable> ids = new HashSet<Comparable>();
354     for (Doc doc : input) {
355       Collection<Comparable> output = joinMap.get(doc.id);
356       if (output == null) continue;
357       ids.addAll(output);
358     }
359     return ids;
360   }
361 
362 }